In [1]:

    
import numpy as np
import tensorflow as tf

Compare speed python list vs numpy ops

Let's implement standard deviation function. And compare computation time for 10 million numbers.



In [2]:

    
n = 10 ** 7



In [3]:

    
# Implementation using python list
def std(x:list):
    x_mean = sum(x)/len(x)
    y = sum([(v - x_mean) ** 2 for v in x])/len(x)
    return y**0.5



In [4]:

    
%time std(range(n))









    



CPU times: user 1.32 s, sys: 131 ms, total: 1.45 s
Wall time: 1.45 s






    Out[4]:





2886751.3459482347



In [5]:

    
# Implementation using numpy array function
def std_np(x):
    x_mean = np.sum(x)/len(x)
    return (((x - x_mean) ** 2).mean())** 0.5



In [6]:

    
%time std_np(np.arange(n))









    



CPU times: user 162 ms, sys: 48.3 ms, total: 211 ms
Wall time: 106 ms






    Out[6]:





2886751.3459480824

As we can see numpy function much fater than that implemtated on python list. There are a built-in function in numpy to compute the standard deviation. Verify the std computed in all three techniques give same result.



In [7]:

    
%time np.std(np.arange(int(1e7)))









    



CPU times: user 333 ms, sys: 16.2 ms, total: 349 ms
Wall time: 59 ms






    Out[7]:





2886751.3459480824



In [8]:

    
%%time 
n_input = tf.placeholder(dtype=tf.float64)
x = tf.range(0, n_input)
x_mean = tf.reduce_mean(x)
x_std = tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(x, x_mean))))
with tf.Session() as sess:
    print(sess.run([x_std], feed_dict={n_input: n}))









    



[2886751.345948096]
CPU times: user 583 ms, sys: 19.1 ms, total: 602 ms
Wall time: 85.7 ms

Matrix multiplication

Usecase - solve Normal Equations

http://mlwiki.org/index.php/Normal_Equation



In [9]:

    
np.random.seed(1)
W = np.array([2.3, - 5.7, 8.9]).T
b = 1.2
X = np.random.random((10, 3))
y = np.dot(X, W)
print("W: ", W)









    



W:  [ 2.3 -5.7  8.9]



In [10]:

    
print("y: ", y)









    



y:  [-3.14568107  0.68066983  1.98993286  4.94832331 -4.29127963  4.13577737
  6.32033813  6.60215509 -2.32666945  6.93726089]



In [11]:

    
W_estimate = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)
W_estimate









    Out[11]:





array([ 2.3, -5.7,  8.9])

Covariance Matrix



In [12]:

    
np.random.seed(1230)
X = np.random.ranf((5, 3))
X









    Out[12]:





array([[0.27702631, 0.36855193, 0.64431478],
       [0.78019793, 0.50860458, 0.52375554],
       [0.84079088, 0.36703687, 0.67039217],
       [0.83824478, 0.68695113, 0.10454645],
       [0.43739591, 0.312447  , 0.25789323]])



In [13]:

    
n = X.shape[0]
X0 = X - np.mean(X, axis = 0)
(X0.T).dot(X0)/n









    Out[13]:





array([[ 0.05438659,  0.01918617, -0.00915189],
       [ 0.01918617,  0.01840192, -0.01705645],
       [-0.00915189, -0.01705645,  0.04950637]])



In [14]:

    
np.cov(X, ddof=0, rowvar=False)









    Out[14]:





array([[ 0.05438659,  0.01918617, -0.00915189],
       [ 0.01918617,  0.01840192, -0.01705645],
       [-0.00915189, -0.01705645,  0.04950637]])



In [15]:

    
np.var(X[:, 0])









    Out[15]:





0.05438658851819618



In [16]:

    
np.cov(X[:,0], X[:,1], ddof=0)









    Out[16]:





array([[0.05438659, 0.01918617],
       [0.01918617, 0.01840192]])

Eigen Value Decomposition

Read about Eigen, SVD, PCA decomposition https://www.cc.gatech.edu/~dellaert/pubs/svd-note.pdf



In [17]:

    
cx = np.cov(X, rowvar=False)
cx









    Out[17]:





array([[ 0.06798324,  0.02398272, -0.01143987],
       [ 0.02398272,  0.0230024 , -0.02132056],
       [-0.01143987, -0.02132056,  0.06188297]])



In [18]:

    
e, v = np.linalg.eig(cx)
e, v









    Out[18]:





(array([0.09171233, 0.05316521, 0.00799107]),
 array([[-0.7030196 , -0.64191217, -0.3061245 ],
        [-0.42282087,  0.03115894,  0.90567744],
        [ 0.57182686, -0.76614482,  0.29331921]]))



In [19]:

    
Z = X - X.mean(axis=0)
Z









    Out[19]:





array([[-0.35770485, -0.08016637,  0.20413434],
       [ 0.14546677,  0.05988628,  0.0835751 ],
       [ 0.20605972, -0.08168143,  0.23021173],
       [ 0.20351361,  0.23823282, -0.33563398],
       [-0.19733525, -0.1362713 , -0.1822872 ]])



In [20]:

    
Z.mean(axis=0)









    Out[20]:





array([ 2.22044605e-17, -4.44089210e-17, -2.22044605e-17])



In [21]:

    
U, D, V = np.linalg.svd(Z)



In [22]:

    
U









    Out[22]:





array([[-0.66387938,  0.15335678, -0.54128459,  0.37391911,  0.32084477],
       [ 0.13174705, -0.33728933, -0.19140748, -0.53739058,  0.73719276],
       [-0.03518989, -0.67481681,  0.38890969,  0.58453227,  0.22462241],
       [ 0.71940261,  0.29042389, -0.30770774,  0.47836284,  0.2731278 ],
       [-0.15208039,  0.56832548,  0.65149012,  0.02978059,  0.47807055]])



In [23]:

    
D









    Out[23]:





array([0.60568087, 0.46115164, 0.17878555])



In [24]:

    
V









    Out[24]:





array([[ 0.7030196 ,  0.42282087, -0.57182686],
       [-0.64191217,  0.03115894, -0.76614482],
       [ 0.3061245 , -0.90567744, -0.29331921]])

U and V are unitary matrix



In [25]:

    
U.dot(U.T)









    Out[25]:





array([[ 1.00000000e+00, -1.09255124e-16,  1.25679661e-17,
         1.98321425e-16, -1.01653038e-16],
       [-1.09255124e-16,  1.00000000e+00, -2.54001297e-16,
         9.28867142e-17,  1.24526333e-16],
       [ 1.25679661e-17, -2.54001297e-16,  1.00000000e+00,
         1.18816684e-16,  3.68935290e-16],
       [ 1.98321425e-16,  9.28867142e-17,  1.18816684e-16,
         1.00000000e+00, -1.65612590e-16],
       [-1.01653038e-16,  1.24526333e-16,  3.68935290e-16,
        -1.65612590e-16,  1.00000000e+00]])



In [26]:

    
V.dot(V.T)









    Out[26]:





array([[ 1.00000000e+00, -1.17976663e-16,  4.11484180e-16],
       [-1.17976663e-16,  1.00000000e+00,  1.18705807e-16],
       [ 4.11484180e-16,  1.18705807e-16,  1.00000000e+00]])

Vectors of U and V are orthogonal



In [27]:

    
U[0].dot(U[1]), U[0].dot(U[2]), U[1].dot(U[2])









    Out[27]:





(-1.249000902703301e-16, 2.7755575615628914e-17, -2.7755575615628914e-16)



In [28]:

    
V[0].dot(V[1]), V[0].dot(V[2]), V[1].dot(V[2])









    Out[28]:





(-1.1102230246251565e-16, 4.163336342344337e-16, 1.1102230246251565e-16)



In [29]:

    
X_0 = np.zeros_like(X)



In [30]:

    
np.fill_diagonal(X_0, D)
X_0









    Out[30]:





array([[0.60568087, 0.        , 0.        ],
       [0.        , 0.46115164, 0.        ],
       [0.        , 0.        , 0.17878555],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ]])



In [31]:

    
U.dot(X_0).dot(V)









    Out[31]:





array([[-0.35770485, -0.08016637,  0.20413434],
       [ 0.14546677,  0.05988628,  0.0835751 ],
       [ 0.20605972, -0.08168143,  0.23021173],
       [ 0.20351361,  0.23823282, -0.33563398],
       [-0.19733525, -0.1362713 , -0.1822872 ]])



In [ ]: